library("knitr") # for knitting RMarkdown
library("kableExtra") # for making nice tables
library("janitor") # for cleaning column names
library("modelr") # for doing modeling stuff
library("tidybayes") # tidying up results from Bayesian models
library("brms") # Bayesian regression models with Stan
library("rstanarm") # for Bayesian models
library("cowplot") # for making figure panels
library("ggrepel") # for labels in ggplots
library("gganimate") # for animations
library("GGally") # for pairs plot
library("bayesplot") # for visualization of Bayesian model fits
library("tidyverse") # for wrangling, plotting, etc.
theme_set(
theme_classic() + #set the theme
theme(text = element_text(size = 20)) #set the default text size
)
Load the poker data set.
df.poker = read_csv("data/poker.csv") %>%
mutate(skill = factor(skill,
levels = 1:2,
labels = c("expert", "average")),
skill = fct_relevel(skill, "average", "expert"),
hand = factor(hand,
levels = 1:3,
labels = c("bad", "neutral", "good")),
limit = factor(limit,
levels = 1:2,
labels = c("fixed", "none")),
participant = 1:n()) %>%
select(participant, everything())
Parsed with column specification:
cols(
skill = col_double(),
hand = col_double(),
limit = col_double(),
balance = col_double()
)
Let’s visualize the data first:
df.poker %>%
ggplot(mapping = aes(x = hand,
y = balance,
fill = hand)) +
geom_point(alpha = 0.2,
position = position_jitter(height = 0, width = 0.1)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "linerange",
size = 1) +
stat_summary(fun.y = "mean",
geom = "point",
shape = 21,
size = 4) +
labs(y = "final balance (in Euros)") +
scale_fill_manual(values = c("red", "orange", "green")) +
theme(legend.position = "none")
And let’s now fit a simple (frequentist) regression model:
fit.lm = lm(formula = balance ~ 1 + hand,
data = df.poker)
fit.lm %>% summary()
Call:
lm(formula = balance ~ 1 + hand, data = df.poker)
Residuals:
Min 1Q Median 3Q Max
-12.9264 -2.5902 -0.0115 2.6573 15.2834
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.9415 0.4111 14.451 < 2e-16 ***
handneutral 4.4051 0.5815 7.576 4.55e-13 ***
handgood 7.0849 0.5815 12.185 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.111 on 297 degrees of freedom
Multiple R-squared: 0.3377, Adjusted R-squared: 0.3332
F-statistic: 75.7 on 2 and 297 DF, p-value: < 2.2e-16
Now, let’s fit a Bayesian regression model using the brm() function:
fit.brm1 = brm(formula = balance ~ 1 + hand,
data = df.poker,
file = "brm1")
fit.brm1 %>% summary()
Family: gaussian
Links: mu = identity; sigma = identity
Formula: balance ~ 1 + hand
Data: df.poker (Number of observations: 300)
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup samples = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
Intercept 5.93 0.41 5.12 6.72 2954 1.00
handneutral 4.41 0.58 3.30 5.55 3488 1.00
handgood 7.10 0.58 5.99 8.29 3476 1.00
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
sigma 4.12 0.17 3.81 4.46 3647 1.00
Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
is a crude measure of effective sample size, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
I use the file = argument to save the model’s results so that when I run this code chunk again, the model doesn’t need to be fit again (fitting Bayesian models takes a while …).
Let’s visualize what the posterior for the different parameters looks like. We use the geom_halfeyeh() function from the “tidybayes” package to do so:
fit.brm1 %>%
posterior_samples() %>%
select(-lp__) %>%
gather("variable", "value") %>%
ggplot(data = .,
mapping = aes(y = variable, x = value)) +
geom_halfeyeh()
And let’s look at how the samples from the posterior are correlated with each other:
fit.brm1 %>%
posterior_samples() %>%
select(b_Intercept:sigma) %>%
ggpairs(lower = list(continuous = wrap("points", alpha = 0.03)),
upper = list(continuous = wrap("cor", size = 6))) +
theme(panel.grid.major = element_blank(),
text = element_text(size = 12))
To compute the MAP (maximum a posteriori probability) estimate and highest density interval, we use the mode_hdi() function that comes with the “tidybayes” package.
fit.brm1 %>%
posterior_samples() %>%
clean_names() %>%
select(starts_with("b_"), sigma) %>%
mode_hdi() %>%
gather("index", "value", -c(.width:.interval)) %>%
select(index, value) %>%
mutate(index = ifelse(str_detect(index, fixed(".")), index, str_c(index, ".mode"))) %>%
separate(index, into = c("parameter", "type"), sep = "\\.") %>%
spread(type, value) %>%
kable(digits = 2) %>%
kable_styling(bootstrap_options = "striped",
full_width = F)
| parameter | lower | mode | upper |
|---|---|---|---|
| b_handgood | 5.95 | 7.07 | 8.21 |
| b_handneutral | 3.29 | 4.46 | 5.54 |
| b_intercept | 5.13 | 6.02 | 6.73 |
| sigma | 3.81 | 4.11 | 4.46 |
To check whether the model did a good job capturing the data, we can simulate what future data the Baysian model predicts, now that it has learned from the data we feed into it.
pp_check(fit.brm1, nsamples = 100)
This looks good! The predicted shaped of the data based on samples from the posterior distribution looks very similar to the shape of the actual data.
Let’s make a hypothetical outcome plot that shows what concrete data sets the model would predict:
# generate predictive samples
df.predictive_samples = fit.brm1 %>%
posterior_samples() %>%
clean_names() %>%
select(contains("b_"), sigma) %>%
sample_n(size = 20) %>%
mutate(sample = 1:n()) %>%
group_by(sample) %>%
nest() %>%
mutate(bad = map(data, ~ .$b_intercept + rnorm(100, sd = .$sigma)),
neutral = map(data, ~ .$b_intercept + .$b_handneutral + rnorm(100, sd = .$sigma)),
good = map(data, ~ .$b_intercept + .$b_handgood + rnorm(100, sd = .$sigma))) %>%
unnest(bad, neutral, good)
# plot the results as an animation
p = df.predictive_samples %>%
gather("hand", "balance", -sample) %>%
mutate(hand = factor(hand, levels = c("bad", "neutral", "good"))) %>%
ggplot(mapping = aes(x = hand,
y = balance,
fill = hand)) +
geom_point(alpha = 0.2,
position = position_jitter(height = 0, width = 0.1)) +
stat_summary(fun.data = "mean_cl_boot",
geom = "linerange",
size = 1) +
stat_summary(fun.y = "mean",
geom = "point",
shape = 21,
size = 4) +
labs(y = "final balance (in Euros)") +
scale_fill_manual(values = c("red", "orange", "green")) +
theme(legend.position = "none") +
transition_manual(sample)
animate(p, nframes = 120, width = 800, height = 600, res = 96, type = "cairo")
# anim_save("poker_posterior_predictive.gif")
One key advantage of Bayesian over frequentist analysis is that we can test hypothesis in a very flexible manner by directly probing our posterior samples in different ways.
We may ask, for example, what the probability is that the parameter for the difference between a bad hand and a neutral hand (b_handneutral) is greater than 0. Let’s plot the posterior distribution together with the criterion:
fit.brm1 %>%
posterior_samples() %>%
select(b_handneutral) %>%
gather("variable", "value") %>%
ggplot(data = .,
mapping = aes(y = variable, x = value)) +
geom_halfeyeh() +
geom_vline(xintercept = 0,
color = "red")
We see that the posterior is definitely greater than 0.
We can ask many different kinds of questions about the data by doing basic arithmetic on our posterior samples. The hypothesis() function makes this even easier. Here are some examples:
# the probability that the posterior for handneutral is less than 0
hypothesis(fit.brm1,
hypothesis = "handneutral < 0")
Hypothesis Tests for class b:
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (handneutral) < 0 4.41 0.58 -Inf 5.35 0
Post.Prob Star
1 0
---
'*': The expected value under the hypothesis lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
# the probability that the posterior for handneutral is greater than 4
hypothesis(fit.brm1,
hypothesis = "handneutral > 4")
Hypothesis Tests for class b:
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (handneutral)-(4) > 0 0.41 0.58 -0.52 Inf 3.05
Post.Prob Star
1 0.75
---
'*': The expected value under the hypothesis lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
# the probability that good hands make twice as much as bad hands
hypothesis(fit.brm1,
hypothesis = "Intercept + handgood > 2 * Intercept")
Hypothesis Tests for class b:
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (Intercept+handgo... > 0 1.18 0.93 -0.29 Inf 8.5
Post.Prob Star
1 0.89
---
'*': The expected value under the hypothesis lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
# the probability that neutral hands make less than the average of bad and good hands
hypothesis(fit.brm1,
hypothesis = "Intercept + handneutral < (Intercept + Intercept + handgood) / 2")
Hypothesis Tests for class b:
Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (Intercept+handne... < 0 0.86 0.5 -Inf 1.68 0.04
Post.Prob Star
1 0.04
---
'*': The expected value under the hypothesis lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
Let’s double check one example, and calculate the result directly based on the posterior samples:
df.hypothesis = fit.brm1 %>%
posterior_samples() %>%
clean_names() %>%
select(starts_with("b_")) %>%
mutate(neutral = b_intercept + b_handneutral,
bad_good_average = (b_intercept + b_intercept + b_handgood)/2,
hypothesis = neutral < bad_good_average)
df.hypothesis %>%
summarize(p = sum(hypothesis)/n())
p
1 0.03975
Another way of testing hypothesis is via the Bayes factor. Let’s fit the two models we are interested in comparing with each other:
fit.brm2 = brm(formula = balance ~ 1 + hand,
data = df.poker,
save_all_pars = T,
file = "brm2")
fit.brm3 = brm(formula = balance ~ 1 + hand + skill,
data = df.poker,
save_all_pars = T,
file = "brm3")
And then compare the models useing the bayes_factor() function:
bayes_factor(fit.brm3, fit.brm2)
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Estimated Bayes factor in favor of bridge1 over bridge2: 3.82262
So far, we have used the defaults that brm() comes with and not bothered about specifiying the priors, etc.
Notice that we didn’t specify any priors in the model. By default, “brms” assigns weakly informative priors to the parameters in the model. We can see what these are by running the following command:
fit.brm1 %>%
prior_summary()
prior class coef group resp dpar nlpar bound
1 b
2 b handgood
3 b handneutral
4 student_t(3, 10, 10) Intercept
5 student_t(3, 0, 10) sigma
We can also get information about which priors need to be specified before fitting a model:
get_prior(formula = balance ~ 1 + hand,
family = "gaussian",
data = df.poker)
prior class coef group resp dpar nlpar bound
1 b
2 b handgood
3 b handneutral
4 student_t(3, 10, 10) Intercept
5 student_t(3, 0, 10) sigma
Here is an example for what a more complete model specification could look like:
fit.brm4 = brm(
formula = balance ~ 1 + hand,
family = "gaussian",
data = df.poker,
prior = c(
prior(normal(0, 10), class = "b", coef = "handgood"),
prior(normal(0, 10), class = "b", coef = "handneutral"),
prior(student_t(3, 3, 10), class = "Intercept"),
prior(student_t(3, 0, 10), class = "sigma")
),
inits = list(
list(Intercept = 0, sigma = 1, handgood = 5, handneutral = 5),
list(Intercept = -5, sigma = 3, handgood = 2, handneutral = 2),
list(Intercept = 2, sigma = 1, handgood = -1, handneutral = 1),
list(Intercept = 1, sigma = 2, handgood = 2, handneutral = -2)
),
iter = 4000,
warmup = 1000,
chains = 4,
file = "brm4",
seed = 1
)
fit.brm4 %>% summary()
Family: gaussian
Links: mu = identity; sigma = identity
Formula: balance ~ 1 + hand
Data: df.poker (Number of observations: 300)
Samples: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
total post-warmup samples = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
Intercept 5.96 0.41 5.15 6.76 9191 1.00
handneutral 4.37 0.58 3.23 5.53 9629 1.00
handgood 7.05 0.58 5.93 8.19 9778 1.00
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
sigma 4.13 0.17 3.81 4.49 12855 1.00
Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
is a crude measure of effective sample size, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
We can also take a look at the Stan code that the brm() function creates:
fit.brm4 %>% stancode()
// generated with brms 2.7.0
functions {
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, K - 1] Xc; // centered version of X
vector[K - 1] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
real temp_Intercept; // temporary intercept
real<lower=0> sigma; // residual SD
}
transformed parameters {
}
model {
vector[N] mu = temp_Intercept + Xc * b;
// priors including all constants
target += normal_lpdf(b[1] | 0, 10);
target += normal_lpdf(b[2] | 0, 10);
target += student_t_lpdf(temp_Intercept | 3, 3, 10);
target += student_t_lpdf(sigma | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
// likelihood including all constants
if (!prior_only) {
target += normal_lpdf(Y | mu, sigma);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = temp_Intercept - dot_product(means_X, b);
}
One thing worth noticing: by default, “brms” centers the predictors which makes it easier to assign a default prior over the intercept.
So far, we’ve assumed that the inference has worked out. We can check this by running plot() on our brm object:
plot(fit.brm1)
Let’s make our own version of a trace plot for one parameter in the model:
fit.brm1 %>%
spread_draws(b_Intercept) %>%
clean_names() %>%
mutate(chain = as.factor(chain)) %>%
ggplot(aes(x = iteration, y = b_intercept, group = chain, color = chain)) +
geom_line()
We can also take a look at the auto-correlation plot. Ideally, we want to generate independent samples from the posterior. So we don’t want subsequent samples to be strongly correlated with each other. Let’s take a look:
variables = fit.brm1 %>% get_variables() %>% .[1:4]
fit.brm1 %>%
posterior_samples() %>%
mcmc_acf(pars = variables,
lags = 4)
Looking good! The autocorrelation should become very small as the lag increases (indicating that we are getting independent samples from the posterior).
Let’s try to fit a model to very little data (just two observations) with extremely uninformative priors:
df.data = tibble(y = c(-1, 1))
fit.brm5 = brm(
data = df.data,
family = gaussian,
formula = y ~ 1,
prior = c(
prior(uniform(-1e10, 1e10), class = Intercept),
prior(uniform(0, 1e10), class = sigma)
),
inits = list(
list(Intercept = 0, sigma = 1),
list(Intercept = 0, sigma = 1)
),
iter = 4000,
warmup = 1000,
chains = 2,
file = "brm5"
)
Let’s take a look at the posterior distributions of the model parameters:
summary(fit.brm5)
Warning: The model has not converged (some Rhats are > 1.1). Do not analyse the results!
We recommend running more iterations and/or setting stronger priors.
Warning: There were 1203 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help.
See http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
Family: gaussian
Links: mu = identity; sigma = identity
Formula: y ~ 1
Data: df.data (Number of observations: 2)
Samples: 2 chains, each with iter = 4000; warmup = 1000; thin = 1;
total post-warmup samples = 6000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI
Intercept 357550121.58 1416057299.71 -2244033111.47 3333594132.43
Eff.Sample Rhat
Intercept 5 2.15
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
sigma 1524412740.64 2392424321.98 21668.93 8317582240.06 3 1.41
Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
is a crude measure of effective sample size, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Not looking good – The estimates and credible intervals are off the charts. And the effective samples sizes in the chains are very small.
Let’s visualize the trace plots:
plot(fit.brm5)
fit.brm5 %>%
spread_draws(b_Intercept) %>%
clean_names() %>%
mutate(chain = as.factor(chain)) %>%
ggplot(aes(x = iteration, y = b_intercept, group = chain, color = chain)) +
geom_line()
Given that we have so little data in this case, we need to help the model a little bit by providing some slighlty more specific priors.
fit.brm6 = brm(
data = df.data,
family = gaussian,
formula = y ~ 1,
prior = c(
prior(normal(0, 10), class = Intercept), # more reasonable priors
prior(cauchy(0, 1), class = sigma)
),
iter = 4000,
warmup = 1000,
chains = 2,
seed = 1,
file = "brm6"
)
Let’s take a look at the posterior distributions of the model parameters:
summary(fit.brm6)
Warning: There were 3 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help.
See http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
Family: gaussian
Links: mu = identity; sigma = identity
Formula: y ~ 1
Data: df.data (Number of observations: 2)
Samples: 2 chains, each with iter = 4000; warmup = 1000; thin = 1;
total post-warmup samples = 6000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
Intercept -0.13 1.69 -4.10 3.06 881 1.00
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
sigma 2.04 1.88 0.61 6.95 1152 1.00
Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
is a crude measure of effective sample size, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
This looks much better. There is still quite a bit of uncertainty in our paremeter estimates, but it has reduced dramatically.
Let’s visualize the trace plots:
plot(fit.brm6)
fit.brm6 %>%
spread_draws(b_Intercept) %>%
clean_names() %>%
mutate(chain = as.factor(chain)) %>%
ggplot(aes(x = iteration, y = b_intercept, group = chain, color = chain)) +
geom_line()
Looking mostly good – except for one hiccup on sigma …
Let’s generate some fake developmental data where the variance in the data is greatest for young children, smaller for older children, and even smaller for adults:
# make example reproducible
set.seed(0)
df.variance = tibble(
group = rep(c("3yo", "5yo", "adults"), each = 20),
response = rnorm(60, mean = rep(c(0, 5, 8), each = 20), sd = rep(c(3, 1.5, 0.3), each = 20))
)
df.variance %>%
ggplot(aes(x = group, y = response)) +
geom_jitter(height = 0,
width = 0.1,
alpha = 0.7)
While frequentist models (such as a linear regression) assume equality of variance, Baysian models afford us with the flexibility of inferring both the parameter estimates of the groups (i.e. the means and differences between the means), as well as the variances.
We simply define a multivariate model which tries to fit both the response as well as the variance sigma:
fit.brm7 = brm(
formula = bf(response ~ group,
sigma ~ group),
data = df.variance,
file = "brm7"
)
Let’s take a look at the model output:
summary(fit.brm7)
Family: gaussian
Links: mu = identity; sigma = log
Formula: response ~ group
sigma ~ group
Data: df.variance (Number of observations: 60)
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup samples = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
Intercept 0.00 0.72 -1.38 1.45 1273 1.00
sigma_Intercept 1.15 0.17 0.85 1.51 2033 1.00
group5yo 5.16 0.77 3.60 6.62 1424 1.00
groupadults 7.96 0.72 6.49 9.37 1276 1.00
sigma_group5yo -1.05 0.23 -1.51 -0.59 2355 1.00
sigma_groupadults -2.19 0.23 -2.65 -1.74 2231 1.00
Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
is a crude measure of effective sample size, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
And let’s visualize the results:
df.variance %>%
expand(group) %>%
add_fitted_draws(fit.brm7, dpar = TRUE) %>%
select(group, .row, .draw, posterior = .value, mu, sigma) %>%
gather("index", "value", c(mu, sigma)) %>%
ggplot(aes(x = value, y = group)) +
geom_halfeyeh() +
geom_vline(xintercept = 0, linetype = "dashed") +
facet_grid(cols = vars(index))
This plot shows what the posterior looks like for both mu (the inferred means), and for sigma (the inferred variances) for the different groups.
For more information, see this tutorial.
While running an ordinal regression is far from trivial in frequentist world, it’s easy to do using “brms”.
Let’s load the cars data and turn the number of cylinders into an ordered factor:
df.cars = mtcars %>%
mutate(cyl = ordered(cyl)) # creates an ordered factor
Let’s check that the cylinders are indeed ordered now:
df.cars %>% str()
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : Ord.factor w/ 3 levels "4"<"6"<"8": 2 2 1 2 3 2 3 1 1 2 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
fit.brm8 = brm(formula = cyl ~ mpg,
data = df.cars,
family = "cumulative",
file = "brm8",
seed = 1)
Visualize the results:
data_plot = df.cars %>%
ggplot(aes(x = mpg, y = cyl, color = cyl)) +
geom_point() +
scale_color_brewer(palette = "Dark2", name = "cyl")
fit_plot = df.cars %>%
data_grid(mpg = seq_range(mpg, n = 101)) %>%
add_fitted_draws(fit.brm8, value = "P(cyl | mpg)", category = "cyl") %>%
ggplot(aes(x = mpg, y = `P(cyl | mpg)`, color = cyl)) +
stat_lineribbon(aes(fill = cyl),
alpha = 1/5,
.width = c(0.95)) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2")
plot_grid(ncol = 1, align = "v",
data_plot,
fit_plot
)
Posterior predictive check:
df.cars %>%
select(mpg) %>%
add_predicted_draws(fit.brm8, prediction = "cyl", seed = 1234) %>%
ggplot(aes(x = mpg, y = cyl)) +
geom_count(color = "gray75") +
geom_point(aes(fill = cyl),
data = df.cars,
shape = 21,
size = 2) +
scale_fill_brewer(palette = "Dark2") +
geom_label_repel(
data = . %>% ungroup() %>% filter(cyl == "8") %>% filter(mpg == max(mpg)) %>% dplyr::slice(1),
label = "posterior predictions",
xlim = c(26, NA),
ylim = c(NA, 2.8),
point.padding = 0.3,
label.size = NA,
color = "gray50",
segment.color = "gray75") +
geom_label_repel(
data = df.cars %>% filter(cyl == "6") %>% filter(mpg == max(mpg)) %>% dplyr::slice(1),
label = "observed data",
xlim = c(26, NA),
ylim = c(2.2, NA),
point.padding = 0.2,
label.size = NA,
segment.color = "gray35")
Information about this R session including which version of R was used, and what packages were loaded.
sessionInfo()
R version 3.5.2 (2018-12-20)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1
[4] purrr_0.3.1 readr_1.3.1 tidyr_0.8.3
[7] tibble_2.0.1 tidyverse_1.2.1 bayesplot_1.6.0
[10] GGally_1.4.0 gganimate_1.0.2 ggrepel_0.8.0
[13] cowplot_0.9.4 rstanarm_2.18.2 brms_2.7.0
[16] ggplot2_3.1.0 Rcpp_1.0.0 tidybayes_1.0.4
[19] modelr_0.1.4 janitor_1.1.1 kableExtra_1.0.1
[22] knitr_1.22 styler_1.1.0.9000
loaded via a namespace (and not attached):
[1] readxl_1.3.0 backports_1.1.3
[3] Hmisc_4.2-0 plyr_1.8.4
[5] igraph_1.2.4 lazyeval_0.2.1
[7] splines_3.5.2 svUnit_0.7-12
[9] crosstalk_1.0.0 rstantools_1.5.1
[11] inline_0.3.15 digest_0.6.18
[13] htmltools_0.3.6 rsconnect_0.8.13
[15] checkmate_1.9.1 magrittr_1.5
[17] cluster_2.0.7-1 matrixStats_0.54.0
[19] xts_0.11-2 prettyunits_1.0.2
[21] colorspace_1.4-0 rvest_0.3.2
[23] haven_2.1.0 xfun_0.5
[25] callr_3.1.1 crayon_1.3.4
[27] jsonlite_1.6 lme4_1.1-21
[29] survival_2.43-3 zoo_1.8-4
[31] glue_1.3.1 gtable_0.2.0
[33] webshot_0.5.1 pkgbuild_1.0.2
[35] rstan_2.18.2 abind_1.4-5
[37] scales_1.0.0 mvtnorm_1.0-10
[39] miniUI_0.1.1.1 htmlTable_1.13.1
[41] viridisLite_0.3.0 xtable_1.8-3
[43] progress_1.2.0.9000 HDInterval_0.2.0
[45] ggstance_0.3.1 foreign_0.8-71
[47] Formula_1.2-3 stats4_3.5.2
[49] StanHeaders_2.18.1 DT_0.5
[51] htmlwidgets_1.3 httr_1.4.0
[53] threejs_0.3.1 arrayhelpers_1.0-20160527
[55] RColorBrewer_1.1-2 acepack_1.4.1
[57] pkgconfig_2.0.2 reshape_0.8.8
[59] loo_2.0.0 farver_1.1.0
[61] nnet_7.3-12 labeling_0.3
[63] tidyselect_0.2.5 rlang_0.3.1
[65] reshape2_1.4.3 later_0.8.0
[67] LaplacesDemon_16.1.1 munsell_0.5.0
[69] cellranger_1.1.0 tools_3.5.2
[71] cli_1.0.1 generics_0.0.2
[73] gifski_0.8.6 broom_0.5.1
[75] ggridges_0.5.1 evaluate_0.13
[77] yaml_2.2.0 processx_3.3.0
[79] nlme_3.1-137 mime_0.6
[81] xml2_1.2.0 compiler_3.5.2
[83] shinythemes_1.1.2 rstudioapi_0.9.0
[85] png_0.1-7 tweenr_1.0.1
[87] stringi_1.3.1 highr_0.7
[89] ps_1.3.0 Brobdingnag_1.2-6
[91] lattice_0.20-38 Matrix_1.2-15
[93] nloptr_1.2.1 markdown_0.9
[95] shinyjs_1.0 pillar_1.3.1
[97] bridgesampling_0.6-0 data.table_1.12.0
[99] httpuv_1.4.5.1 R6_2.4.0
[101] latticeExtra_0.6-28 bookdown_0.9
[103] promises_1.0.1 gridExtra_2.3
[105] codetools_0.2-16 boot_1.3-20
[107] colourpicker_1.0 MASS_7.3-51.1
[109] gtools_3.8.1 assertthat_0.2.0
[111] withr_2.1.2 shinystan_2.5.0
[113] parallel_3.5.2 hms_0.4.2
[115] rpart_4.1-13 grid_3.5.2
[117] coda_0.19-2 minqa_1.2.4
[119] snakecase_0.9.2 rmarkdown_1.11
[121] shiny_1.2.0 lubridate_1.7.4
[123] base64enc_0.1-3 dygraphs_1.1.1.6